/*=========================================================================
Elite College as Engines of Upward Mobility: Evidence from Colombia's Ser Pilo Paga
Authors: Juliana Londoño-Vélez, Catherine Rodriguez, Fabio Sánchez
and Luis Esteban Álvarez-Arango

Creation date: June 6th, 2025
--------------------------------------------------------------------------
Value added models: Inputs for Table 5 and figures 8a, 8b, A13 and D
=========================================================================*/

glo cond_saber11	"eligible_sisben==1"
glo cond_sisben		"eligible_saber11==1"
glo lim_saber11		40
glo lim_sisben		20

use "${data2}/S11_20101_20202.dta", clear
keep if periodo11=="20122" | periodo11=="20132"
gen icfes_per=20132 if periodo11=="20132"
replace icfes_per=20122 if periodo11=="20122"
replace cole_cod_dane_sede11="" if cole_cod_dane_sede11=="000000000000"
replace fami_estratovivienda11="" if regexm(fami_estratovivienda11,"zona")
replace fami_estratovivienda11="0" if fami_estratovivienda11==""
gen school_stratum=substr(fami_estratovivienda11,-1,1)
destring school_stratum, replace

foreach x in padre madre{
	gen school_edu`x'=0
	replace school_edu`x'=0 if fami_educacion`x'11=="Ninguno"
	replace school_edu`x'=2.5 if fami_educacion`x'11=="Primaria incompleta"
	replace school_edu`x'=5 if fami_educacion`x'11=="Primaria completa"
	replace school_edu`x'=8.5 if fami_educacion`x'11=="Secundaria (Bachillerato) incompleta"
	replace school_edu`x'=11 if fami_educacion`x'11=="Secundaria (Bachillerato) completa"
	replace school_edu`x'=12.5 if fami_educacion`x'11=="Técnica o tecnológica incompleta"
	replace school_edu`x'=14 if fami_educacion`x'11=="Técnica o tecnológica completa"
	replace school_edu`x'=13.5 if fami_educacion`x'11=="Educación profesional incompleta"
	replace school_edu`x'=16 if fami_educacion`x'11=="Educación profesional completa"
	replace school_edu`x'=18 if fami_educacion`x'11=="Postgrado"
}

destring periodo11, replace
destring recaf_punt_sociales_ciudadanas11 recaf_punt_ingles11 recaf_punt_lectura_critica11 recaf_punt_matematicas11 recaf_punt_c_naturales11, replace
destring punt_global11 punt_matematicas11 punt_ingles11 punt_lectura_critica11 punt_c_naturales11 punt_sociales_ciudadanas11 punt_comp_ciudadana11, replace

foreach x in punt_matematicas11 punt_lectura_critica11 punt_c_naturales11 punt_sociales_ciudadanas11 punt_ingles11{
	replace `x'=recaf_`x' 
	drop recaf_`x'
}
	
replace punt_global11=round((3*punt_matematicas11 + 3*punt_lectura_critica11 + 3*punt_c_naturales11 + 3*punt_sociales_ciudadanas11 + punt_ingles11)*5/13)
gen school_sb11=punt_global11

gen stratum=school_stratum
gen edupadre=school_edupadre
gen edumadre=school_edumadre

foreach x in school_stratum school_edupadre school_edumadre school_sb11 {
	gegen aux=total(`x'), by(cole_cod_dane_sede11 icfes_per)
	gen N=1
	ereplace N=total(N), by(cole_cod_dane_sede11 icfes_per)
	replace `x'=(aux-`x')/(N-1)
	drop aux N
}

keep estu_snp stratum edupadre edumadre school_stratum school_edupadre school_edumadre school_sb11
gduplicates drop
tempfile school
save `school'

****
use "${data}/data_RD", clear
keep if icfes_per==20132 | icfes_per==20122
merge 1:1 estu_snp using `school', nogen keep(1 3)
replace codigo_ies=. if access_6==0
replace codigo_programa=. if access_6==0
replace codigo_ies=0 if codigo_ies==.
replace codigo_programa=0 if codigo_programa==.
gen icfes_schoolsch6=(icfes_schoolsch1==. &  icfes_schoolsch2==. & icfes_schoolsch3==. & icfes_schoolsch4==. & icfes_schoolsch5==. )
recode icfes_schoolsch1 icfes_schoolsch2 icfes_schoolsch3 icfes_schoolsch4 icfes_schoolsch5 (.=0)
gen no_school=(school_urban==.)
replace school_urban=0 if school_urban==.
replace icfes_privatehs=0 if icfes_privatehs==.
gen sisben_score_100=sisben_score
replace sisben_score_100=100 if sisben_score==.
gegen school_sisben=mean(sisben_score_100), by(cole_cod_dane_sede11 icfes_per)
gen m_sisben=(sisben_score==.)
gen categoria=""
replace categoria="hq_pri" if hq_pri==1
replace categoria="hq_pub" if hq_pub==1
replace categoria="lq_pri" if lq_pri==1
replace categoria="lq_pub" if lq_pub==1
replace categoria="" if codigo_ies==0
replace area_snies="" if access_6==0
replace area_snies="NA" if area_snies=="" | area_snies=="Sin clasificar"
encode area_snies, gen(cod_area)
gegen codigo_area=group(codigo_ies cod_area)
tab codigo_area if codigo_ies==0 & area_snies=="NA" // código 1
replace codigo_area=0 if codigo_area==1
replace nbc="" if access_6==0
replace nbc="NA" if nbc=="" | nbc=="Sin clasificar"
encode nbc, gen(cod_nbc)
gegen codigo_nbc=group(codigo_ies cod_nbc)
tab codigo_nbc if codigo_ies==0 & nbc=="NA" // código 1
replace codigo_nbc=0 if codigo_nbc==1
gen p=1 
gegen estudiantes_ies=total(access_6), by(codigo_ies)
gegen estudiantes_prog=total(access_6), by(codigo_programa)
ereplace estudiantes_ies=total(p) if codigo_ies==0, by(codigo_ies)
ereplace estudiantes_prog=total(p) if codigo_programa==0, by(codigo_programa)
rename codigo_programa codigo_prog

rename puntaje_global_s11_final punt_sb11
gen punt_sb112=punt_sb11^2
gen punt_sb113=punt_sb11^3
gen icfes_age2=icfes_age^2
gen icfes_age3=icfes_age^3
replace distancia=0 if distancia==.
gen distancia2=distancia^2
gen distancia3=distancia^3

drop if graduation_exam_7==1 & access_6==0
replace graduation_exam_7=. if access_6==0
replace graduation_exam_7=. if codigo_ies==0 | codigo_prog==0
replace graduation_exam_pro_7=. if access_pro_6==0
replace graduation_exam_pro_7=. if codigo_ies==0 | codigo_prog==0
replace score_saberpro_5=. if access_pro_6==0
replace score_saberpro_5=. if codigo_ies==0 | codigo_prog==0

gen p2013=(icfes_per==2013)

global model_aa
global model_a punt_sb11 punt_sb112 punt_sb113 icfes_age icfes_age2 icfes_age3 ethnminority icfes_female stratum edupadre edumadre family_size p2013 sisben_score_100 m_sisben 
global model_b ${model_a} icfes_schoolsch1 icfes_schoolsch2 icfes_schoolsch3 icfes_schoolsch4 icfes_schoolsch6 icfes_privatehs no_school school_urban
global model_c ${model_b} school_stratum school_edupadre school_edumadre school_sb11 school_sisben
global model_d ${model_c} college_sb11
global model_e ${model_d} college_stratum college_edupadre college_edumadre
global model_f ${model_e} college_sisben_score_100 college_m_sisben 

**# Modelos VA

local nn=10
global id codigo_prog

drop if estudiantes_prog<`nn' & codigo_prog!=0
gen sb11=punt_sb11
foreach x in sb11 stratum edupadre edumadre sisben_score_100 m_sisben{
	gegen aux=total(`x'), by(codigo_prog icfes_per)
	gen N=1
	ereplace N=total(N), by(codigo_prog icfes_per)
	gen college_`x'=(aux-`x')/(N-1)
	drop aux N
}

gegen aux=total(m_sisben), by(codigo_prog)
gen N=1
ereplace N=total(N), by(codigo_prog)
gen proporcion_missing=aux/N // proporción de missings en sisben por college
drop aux N

local models aa a b c d e f
foreach x in graduation_exam_7 graduation_exam_pro_7 score_saberpro_5 work_9 mw_9 {
	foreach m of local models{
		cap drop `x'_`m'_va
		dis as error "variable `x' model `m' "
		reghdfe `x' ${model_`m'}, abs(`x'_`m'_va=codigo_prog)
		if "`x'"=="work_9" | "`x'"=="mw_9"{
			levelsof `x'_`m'_va if codigo_prog==0, local(base)
			replace `x'_`m'_va=`x'_`m'_va - `base'
		}
		ereplace `x'_`m'_va=max(`x'_`m'_va), by(codigo_prog)
	}
}

keep ${id} categoria proporcion_missing estudiantes_prog graduation_exam_7_*_va graduation_exam_pro_7_*_va score_saberpro_5_*_va work_9_*_va mw_9_*_va
  
gduplicates drop

save "${Intermediate}/va_prog", replace
